——— call libraries –
library("grid")
library("ggplot2")
library("gplots")
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library("viridis")
## Loading required package: viridisLite
library("reshape2")
library("gridExtra")
library("RColorBrewer")
library("scales")
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
library("stringr")
source('D:/Pembro-Fluvac/Analysis/PembroFluSpec_Ranalysis_files/PembroFluSpec_PlottingFunctions.R')
———– merge & qc data from spreadsheets ——————–
setwd("D:/Pembro-Fluvac/Analysis")
mergedData <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Tflow_freqParent_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
print("Data by year and cohort for T cell flow cytometry")
## [1] "Data by year and cohort for T cell flow cytometry"
table(mergedData$Cohort, mergedData$TimeCategory, mergedData$Year)
## , , = 1
##
##
## baseline late oneWeek
## aPD1 3 1 3
## Healthy 0 0 0
## nonPD1 0 0 0
##
## , , = 2
##
##
## baseline late oneWeek
## aPD1 3 2 3
## Healthy 12 11 12
## nonPD1 1 1 1
##
## , , = 3
##
##
## baseline late oneWeek
## aPD1 16 11 16
## Healthy 15 3 15
## nonPD1 1 0 1
fit <- lm(data=mergedData[ , - which( colnames(mergedData) == "Label" | colnames(mergedData) == "Subject" | colnames(mergedData) == "Cohort" |
colnames(mergedData) == "TimeCategory" | colnames(mergedData) == "TimePoint")],
formula = TflowBatch ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(mergedData) [ grep( paste(a, collapse="|"), colnames(mergedData)) ] <- paste0( colnames(mergedData)[ grep( paste(a, collapse="|"), colnames(mergedData)) ], ".batchEffect" ) }
serology <- read.csv(file= "D:/Pembro-Fluvac/Analysis/mergedData/SerologyMeasurements.csv", stringsAsFactors = F, header = T)
print("Data by year and cohort for serology")
## [1] "Data by year and cohort for serology"
table(serology$Cohort, serology$TimeCategory, serology$Year)
## , , = 1
##
##
## baseline late oneWeek
## aPD1 4 4 3
## Healthy 0 0 0
## nonPD1 0 0 0
##
## , , = 2
##
##
## baseline late oneWeek
## aPD1 12 8 3
## Healthy 12 12 12
## nonPD1 3 3 1
temp1 <- merge(x = mergedData, y=serology, all = T, suffixes = c(".Tflow",".Serology"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))
Bflow <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Bflow_freqParent_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
print("Data by year and cohort for B cell flow cytometry")
## [1] "Data by year and cohort for B cell flow cytometry"
table(Bflow$Cohort, Bflow$TimeCategory, Bflow$Year)
## , , = 3
##
##
## baseline late oneWeek
## aPD1 16 13 16
## Healthy 15 7 14
## nonPD1 1 0 1
BflowBATCH <- Bflow[, -c( grep( colnames(Bflow), pattern=paste0(c("Naive","Lymphocytes", "CD4","CD19hi_..FreqParent","CD19hi_CD11c","CD19hi_CD16","CD19hi_CD23","CD1hi9_CD25",
"CD19hi_CD32","CD19hi_CD38lo...FreqParent","CD19hi_CD80","CD19hi_CD86","CD19hi_IgM","CD19hi_CD138",
"CD19hi_CXCR4","CD19hi_CD71","CD19hi_Ki67..CXCR4","CD19hi_Ki67..Ki67","CD38lo...FreqParent",
"CD19hi_Tbet"), collapse ="|")) )] # cut out columns to test for batch fx, else variables > observations
fit <- lm(data=BflowBATCH[ , - which( colnames(BflowBATCH) == "Label" | colnames(BflowBATCH) == "Subject" | colnames(BflowBATCH) == "Cohort" |
colnames(BflowBATCH) == "TimeCategory" | colnames(BflowBATCH) == "TimePoint")],
formula = BflowBatch ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(Bflow) [ grep( paste(a, collapse="|"), colnames(Bflow)) ] <- paste0( colnames(Bflow)[ grep( paste(a, collapse="|"), colnames(Bflow)) ], ".batchEffect" )}
temp2 <- merge(x = temp1, y=Bflow, all = T, suffixes = c(".Tflow",".Bflow"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))
Tmfi <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Tflow_medianFI_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
Tmfi <- Tmfi[, -c(grep( colnames(Tmfi), pattern=paste0(c("Dead","medfi.CD20","medfi.CD4","Freq","ICOSlo"), collapse ="|")))] # cut out columns to test for batch fx, else variables > observations
fit <- lm(data=Tmfi[ , - which( colnames(Tmfi) == "Label" | colnames(Tmfi) == "Subject" | colnames(Tmfi) == "Cohort" |
colnames(Tmfi) == "TimeCategory" | colnames(Tmfi) == "TimePoint")],
formula = TmfiBatch ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(Tmfi) [ grep( paste(a, collapse="|"), colnames(Tmfi)) ] <- paste0( colnames(Tmfi)[ grep( paste(a, collapse="|"), colnames(Tmfi)) ], ".batchEffect" )}
temp3 <- merge(x = temp2, y=Tmfi, all = T, suffixes = c(".one",".Tmedfi"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))
demog <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/clinicalMetadata.csv", stringsAsFactors = F, header=T); demog$TimePoint <- demog$Visit <- NULL
temp4 <- merge(x = temp3, y=demog, all=T, suffixes = c(".two",".demog"), by = c('Subject', 'TimeCategory', 'Year','Cohort'))
Bmfi <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Bflow_Bmfi_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
fit <- lm(data=Bmfi[ , - which( colnames(Bmfi) == "Label" | colnames(Bmfi) == "Subject" | colnames(Bmfi) == "Cohort" |
colnames(Bmfi) == "TimeCategory" | colnames(Bmfi) == "TimePoint")],
formula = BmfiBatch ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(Bmfi) [ grep( paste(a, collapse="|"), colnames(Bmfi)) ] <- paste0( colnames(Bmfi)[ grep( paste(a, collapse="|"), colnames(Bmfi)) ], ".batchEffect" )}
temp5 <- merge(x = temp4, y=Bmfi, all = T, suffixes = c(".one",".TBpmedfi"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))
TBpmfi <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Bflow_TmfiFromB_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
fit <- lm(data=TBpmfi[ , - which( colnames(TBpmfi) == "Label" | colnames(TBpmfi) == "Subject" | colnames(TBpmfi) == "Cohort" |
colnames(TBpmfi) == "TimeCategory" | colnames(TBpmfi) == "TimePoint")],
formula = TfromBBatch ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(TBpmfi) [ grep( paste(a, collapse="|"), colnames(TBpmfi)) ] <- paste0( colnames(TBpmfi)[ grep( paste(a, collapse="|"), colnames(TBpmfi)) ], ".batchEffect" )}
temp6 <- merge(x = temp5, y=TBpmfi, all = T, suffixes = c(".one",".TBpmedfi"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))
mergedData <- temp6
# mergedData <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/allMergedData.csv", stringsAsFactors = F, header = T, row.names = 1)
mergedData$TimeCategory <- factor(mergedData$TimeCategory, levels = c("baseline", "oneWeek","late"))
mergedData$Cohort <- factor(mergedData$Cohort, levels = c("Healthy", "aPD1","nonPD1"))
mergedData$dummy <- "dummy"
rm(Bflow); rm(temp1); rm(temp2); rm(temp3); rm(temp4); rm(temp5); rm(temp6)
# write.csv(mergedData, file = "D:/Pembro-Fluvac/Analysis/mergedData/allMergedData.csv")
dataAvail <- mergedData[ , - grep(paste(c("TimePoint", "dummy", "Visit","Label"), collapse = "|"), colnames(mergedData))]
temp <- reshape(dataAvail, idvar = c('Subject', 'Cohort','Year'), timevar = c('TimeCategory'),direction = 'wide')
saveCohort <- temp[which(temp$Cohort != "nonPD1"),"Cohort"]
saveYear <- temp[which(temp$Cohort != "nonPD1"),"Year"]
simplifiedDataAvail <- temp[which(temp$Cohort != "nonPD1"), c('Subject',
'cTfh_ICOShiCD38hi_CCR6hi...FreqParent.baseline' , # Tflow_freq
'Plasma.CXCL13..pg.mL..baseline' , # Serology
'IgDlo_CD71hi_ActBCells.Tbet....FreqParent.baseline' , # Bflow_freq
'CD19hi_CD27hiCD38hi_CD20lo.PB..._medfi.Foxp3..baseline' , # Tflow_medFI
'Sex.baseline' , # Demographics
'CD20loCD71hi...medfi.Blimp1..baseline')] # Bflow_medFI
rownames(simplifiedDataAvail) <- simplifiedDataAvail$Subject; simplifiedDataAvail$Subject <- NULL;
simplifiedDataAvail <- as.data.frame(!is.na(simplifiedDataAvail)); simplifiedDataAvail <- simplifiedDataAvail * 2
simplifiedDataAvail <- cbind(simplifiedDataAvail, Cohort = (as.numeric(saveCohort)-1)*2, Year = as.numeric(saveYear)-1)
names(simplifiedDataAvail) <- c("Tflow_freq","Serology","Bflow_freq", "Tflow_medFI","Demographics","Bflow_medFI","Cohort", "Year")
pheatmap(as.data.frame(t(simplifiedDataAvail)), cluster_col = F,color=gray.colors(100, start=1,end=0), fontsize_row = 18, fontsize_col = 6, main = "Data available by subject"
# , filename = "D:/Pembro-Fluvac/Analysis/Images/dataAvailable.pdf"
); dev.off()

## null device
## 1
———– Cohort description ——————–
table(mergedData$Cohort, mergedData$Sex, mergedData$Year)/3 # divide by 3 because demog data repeated 3x per subject
## , , = 1
##
##
## Female Male
## Healthy 0.0000000 0.0000000
## aPD1 0.3333333 1.0000000
## nonPD1 0.0000000 0.0000000
##
## , , = 2
##
##
## Female Male
## Healthy 2.0000000 2.0000000
## aPD1 2.0000000 2.0000000
## nonPD1 0.0000000 0.0000000
##
## , , = 3
##
##
## Female Male
## Healthy 2.0000000 3.0000000
## aPD1 1.3333333 4.0000000
## nonPD1 0.0000000 0.0000000
#pdf(file = "D:/Pembro-Fluvac/Analysis/Images/Cycle_of_immunotherapy.pdf")
hist(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Cycle.of.Immunotherapy'], breaks = 50, col = "grey90",main = "Cycle of IT")

# dev.off()
median(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Cycle.of.Immunotherapy'], na.rm = T)
## [1] 6.5
———– Global overview of phenotypic differences ——————–
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_NaiveB...FreqParent"))
prePostTimeAveraged(melted, title = "Naive B frequencies averaged", xLabel = NULL, yLabel = "NaiveB (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_NaiveB...FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = CD19hi_NaiveB...FreqParent ~ Cohort + Year + TimeCategory,
## data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.041 -7.960 0.489 12.084 34.620
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.180 3.687 10.354 2.64e-16 ***
## CohortaPD1 14.065 3.900 3.607 0.000545 ***
## CohortnonPD1 -9.451 12.624 -0.749 0.456334
## Year NA NA NA NA
## TimeCategoryoneWeek 4.930 4.369 1.128 0.262604
## TimeCategorylate -4.824 4.984 -0.968 0.336061
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.33 on 78 degrees of freedom
## (78 observations deleted due to missingness)
## Multiple R-squared: 0.1829, Adjusted R-squared: 0.141
## F-statistic: 4.365 on 4 and 78 DF, p-value: 0.003083
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_Ki67....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+Ki67+ frequencies averaged", xLabel = NULL, yLabel = "CD19+Ki67+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_Ki67....FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = CD19hi_Ki67....FreqParent ~ Cohort + Year + TimeCategory,
## data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6583 -0.9559 -0.3589 0.5265 8.7111
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.4238 0.3259 4.369 3.8e-05 ***
## CohortaPD1 0.2651 0.3446 0.769 0.444
## CohortnonPD1 -0.1634 1.1157 -0.146 0.884
## Year NA NA NA NA
## TimeCategoryoneWeek -0.1708 0.3861 -0.442 0.659
## TimeCategorylate 0.3994 0.4404 0.907 0.367
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.532 on 78 degrees of freedom
## (78 observations deleted due to missingness)
## Multiple R-squared: 0.0328, Adjusted R-squared: -0.0168
## F-statistic: 0.6614 on 4 and 78 DF, p-value: 0.6207
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD71....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD71+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD71+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_CD71....FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = CD19hi_CD71....FreqParent ~ Cohort + Year + TimeCategory,
## data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0318 -1.1309 -0.4282 0.8963 9.0763
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.46818 0.41401 5.962 6.86e-08 ***
## CohortaPD1 -0.44450 0.43785 -1.015 0.3132
## CohortnonPD1 -0.06632 1.41735 -0.047 0.9628
## Year NA NA NA NA
## TimeCategoryoneWeek 0.25628 0.49053 0.522 0.6028
## TimeCategorylate 1.01110 0.55956 1.807 0.0746 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.946 on 78 degrees of freedom
## (78 observations deleted due to missingness)
## Multiple R-squared: 0.04905, Adjusted R-squared: 0.0002855
## F-statistic: 1.006 on 4 and 78 DF, p-value: 0.4097
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD80....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD80+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD80+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_CD80....FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = CD19hi_CD80....FreqParent ~ Cohort + Year + TimeCategory,
## data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7011 -0.9985 -0.3846 0.7051 3.9340
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.33110 0.33397 9.974 1.41e-15 ***
## CohortaPD1 -0.76105 0.35319 -2.155 0.0343 *
## CohortnonPD1 -1.22854 1.14331 -1.075 0.2859
## Year NA NA NA NA
## TimeCategoryoneWeek 0.17487 0.39569 0.442 0.6598
## TimeCategorylate 0.01458 0.45137 0.032 0.9743
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.57 on 78 degrees of freedom
## (78 observations deleted due to missingness)
## Multiple R-squared: 0.06555, Adjusted R-squared: 0.01763
## F-statistic: 1.368 on 4 and 78 DF, p-value: 0.2528
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD86....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD86+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD86+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_CD86....FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = CD19hi_CD86....FreqParent ~ Cohort + Year + TimeCategory,
## data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.127 -3.121 -1.199 1.198 38.723
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.5771 1.2860 2.004 0.0485 *
## CohortaPD1 3.1425 1.3600 2.311 0.0235 *
## CohortnonPD1 -1.2056 4.4026 -0.274 0.7849
## Year NA NA NA NA
## TimeCategoryoneWeek 1.0570 1.5237 0.694 0.4899
## TimeCategorylate 0.1353 1.7381 0.078 0.9381
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.045 on 78 degrees of freedom
## (78 observations deleted due to missingness)
## Multiple R-squared: 0.07476, Adjusted R-squared: 0.02731
## F-statistic: 1.576 on 4 and 78 DF, p-value: 0.1891
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_IgM....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+IgM+ frequencies averaged", xLabel = NULL, yLabel = "CD19+IgM+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD138....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD138+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD138+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_Tbet....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+Tbet+ frequencies averaged", xLabel = NULL, yLabel = "CD19+Tbet+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..ICOShi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+ICOS+ frequencies averaged", xLabel = NULL, yLabel = "CD4+ICOS+ (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..ICOShi...FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = Live_CD3..CD4..ICOShi...FreqParent ~ Cohort + Year +
## TimeCategory, data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0105 -1.6260 -0.0323 1.2164 9.1895
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.09149 1.23720 2.499 0.0138 *
## CohortaPD1 6.90950 0.54832 12.601 <2e-16 ***
## CohortnonPD1 2.05412 1.41132 1.455 0.1480
## Year 1.13692 0.45151 2.518 0.0131 *
## TimeCategoryoneWeek -0.26431 0.60293 -0.438 0.6619
## TimeCategorylate 0.09752 0.71099 0.137 0.8911
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.045 on 125 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.5851, Adjusted R-squared: 0.5685
## F-statistic: 35.26 on 5 and 125 DF, p-value: < 2.2e-16
subsetData <- subset(mergedData, Cohort != "nonPD1" & Year == 3 ) # given strong influence of Year in the linear model
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..CTLA4hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+CTLA4+ frequencies averaged", xLabel = NULL, yLabel = "CD4+CTLA4+ (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..CTLA4hi...FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = Live_CD3..CD4..CTLA4hi...FreqParent ~ Cohort + Year +
## TimeCategory, data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5064 -2.3519 -0.4784 1.7747 12.6697
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.8721 1.5939 -4.939 2.46e-06 ***
## CohortaPD1 4.6061 0.7064 6.520 1.56e-09 ***
## CohortnonPD1 0.1421 1.8182 0.078 0.938
## Year 5.4675 0.5817 9.399 3.38e-16 ***
## TimeCategoryoneWeek 1.1420 0.7768 1.470 0.144
## TimeCategorylate 0.7163 0.9160 0.782 0.436
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.922 on 125 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.5482, Adjusted R-squared: 0.5301
## F-statistic: 30.33 on 5 and 125 DF, p-value: < 2.2e-16
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..CD38hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+CD38hi frequencies averaged", xLabel = NULL, yLabel = "CD4+CD38hi (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..CD38hi...FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = Live_CD3..CD4..CD38hi...FreqParent ~ Cohort + Year +
## TimeCategory, data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.1317 -1.9852 -0.7808 1.4314 10.4204
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.9334 1.4565 6.133 1.04e-08 ***
## CohortaPD1 -3.8999 0.6455 -6.042 1.62e-08 ***
## CohortnonPD1 -4.7783 1.6615 -2.876 0.00474 **
## Year 0.4487 0.5315 0.844 0.40018
## TimeCategoryoneWeek -0.4178 0.7098 -0.589 0.55715
## TimeCategorylate -0.7245 0.8370 -0.866 0.38840
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.584 on 125 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.2485, Adjusted R-squared: 0.2185
## F-statistic: 8.269 on 5 and 125 DF, p-value: 8.93e-07
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..Foxp3hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+Foxp3hi frequencies averaged", xLabel = NULL, yLabel = "CD4+Foxp3hi (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year +
## TimeCategory, data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.6337 -1.8716 0.0331 1.6646 6.8068
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.7245 0.9393 6.094 1.26e-08 ***
## CohortaPD1 1.3958 0.4163 3.353 0.00106 **
## CohortnonPD1 -0.4404 1.0715 -0.411 0.68180
## Year -0.4711 0.3428 -1.374 0.17187
## TimeCategoryoneWeek -0.5445 0.4578 -1.189 0.23651
## TimeCategorylate 1.0108 0.5398 1.872 0.06348 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.312 on 125 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.1554, Adjusted R-squared: 0.1217
## F-statistic: 4.601 on 5 and 125 DF, p-value: 0.0006906
summary(lm( data = mergedData, Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + TimeCategory + Live_CD3..CD4..CTLA4hi...FreqParent + Live_CD3..CD4..ICOShi...FreqParent))
##
## Call:
## lm(formula = Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year +
## TimeCategory + Live_CD3..CD4..CTLA4hi...FreqParent + Live_CD3..CD4..ICOShi...FreqParent,
## data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.3134 -1.4895 0.0339 1.5967 7.2888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.01145 1.00219 6.996 1.49e-10 ***
## CohortaPD1 -0.14411 0.59563 -0.242 0.809226
## CohortnonPD1 -0.65404 1.00563 -0.650 0.516662
## Year -1.66127 0.41666 -3.987 0.000114 ***
## TimeCategoryoneWeek -0.74781 0.43072 -1.736 0.085032 .
## TimeCategorylate 0.85949 0.50343 1.707 0.090299 .
## Live_CD3..CD4..CTLA4hi...FreqParent 0.19892 0.05069 3.924 0.000144 ***
## Live_CD3..CD4..ICOShi...FreqParent 0.09026 0.06530 1.382 0.169433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.151 on 123 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.2808, Adjusted R-squared: 0.2398
## F-statistic: 6.859 on 7 and 123 DF, p-value: 6.995e-07
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..Ki67hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+Ki67hi frequencies averaged", xLabel = NULL, yLabel = "CD4+Ki67hi (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..Ki67hi...FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = Live_CD3..CD4..Ki67hi...FreqParent ~ Cohort + Year +
## TimeCategory, data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0387 -1.0913 -0.2433 0.7418 9.0418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.952449 0.668557 5.912 3.01e-08 ***
## CohortaPD1 0.280504 0.296302 0.947 0.3456
## CohortnonPD1 -0.690810 0.762648 -0.906 0.3668
## Year -0.632018 0.243987 -2.590 0.0107 *
## TimeCategoryoneWeek 0.069804 0.325811 0.214 0.8307
## TimeCategorylate 0.006414 0.384207 0.017 0.9867
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.645 on 125 degrees of freedom
## (30 observations deleted due to missingness)
## Multiple R-squared: 0.06121, Adjusted R-squared: 0.02366
## F-statistic: 1.63 on 5 and 125 DF, p-value: 0.1568
———– phenotypic differences ——————–
# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_..FreqParent <- subsetData$cTfh_ICOShiCD38hi_..FreqParent * subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /10000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "+/+ cTfh frequencies averaged", xLabel = NULL, yLabel = "+/+ cTfh (freq CD4)")

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CTLA4hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CTLA4hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent *
subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CTLA4hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CTLA4hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CTLA4hi (freq CD4)")

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CTLA4hi...FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CTLA4hi...FreqParent ~ Cohort +
## Year + TimeCategory, data = subsetData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35023 -0.10025 -0.02990 0.06669 0.91298
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.147508 0.073348 2.011 0.04654 *
## CohortaPD1 0.108438 0.032160 3.372 0.00100 **
## Year -0.003298 0.026809 -0.123 0.90229
## TimeCategoryoneWeek 0.100877 0.036072 2.797 0.00601 **
## TimeCategorylate -0.005669 0.042440 -0.134 0.89396
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1785 on 121 degrees of freedom
## (26 observations deleted due to missingness)
## Multiple R-squared: 0.1478, Adjusted R-squared: 0.1196
## F-statistic: 5.247 on 4 and 121 DF, p-value: 0.0006239
# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & Year == 3)
subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent *
subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_Ki67hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ Ki67hi frequencies averaged", xLabel = NULL, yLabel = "+/+ Ki67hi (freq CD4)")

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort + Year + TimeCategory)) # mergedData because of influence of Year
##
## Call:
## lm(formula = cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort +
## Year + TimeCategory, data = subsetData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17838 -0.07199 -0.02214 0.03502 0.79451
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.111944 0.028914 3.872 0.000236 ***
## CohortaPD1 0.015299 0.031512 0.486 0.628796
## Year NA NA NA NA
## TimeCategoryoneWeek 0.057102 0.033808 1.689 0.095549 .
## TimeCategorylate 0.005275 0.043694 0.121 0.904237
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1331 on 72 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.04414, Adjusted R-squared: 0.004312
## F-statistic: 1.108 on 3 and 72 DF, p-value: 0.3515
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent *
subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_Ki67hi...FreqParent", fillParam="Cohort", title="Ki67 in ICOS+CD38+ cTfh at oW",
yLabel="+/+ Ki67hi (freq CD4) ")

twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_cTfh..._medfi.Ki67.", fillParam="Cohort", title="Ki67 in ICOS+CD38+ cTfh at oW",
yLabel="+/+ for medianFI of Ki67")
## Warning: Removed 18 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CXCR3hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CXCR3hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent *
subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CXCR3hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CXCR3hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CXCR3hi (freq CD4)")

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CXCR3hi...FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CXCR3hi...FreqParent ~ Cohort +
## Year + TimeCategory, data = subsetData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32735 -0.09347 -0.02113 0.04789 1.09346
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.191102 0.075829 2.520 0.013032 *
## CohortaPD1 0.069720 0.033248 2.097 0.038078 *
## Year -0.040554 0.027715 -1.463 0.145999
## TimeCategoryoneWeek 0.147631 0.037292 3.959 0.000128 ***
## TimeCategorylate 0.001777 0.043876 0.041 0.967752
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1846 on 121 degrees of freedom
## (26 observations deleted due to missingness)
## Multiple R-squared: 0.1667, Adjusted R-squared: 0.1392
## F-statistic: 6.051 on 4 and 121 DF, p-value: 0.0001792
# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CD25hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CD25hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent *
subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CD25hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CD25hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CD25hi (freq CD4)")

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort + Year + TimeCategory))
##
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort +
## Year + TimeCategory, data = subsetData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.047427 -0.025753 -0.007976 0.009342 0.194646
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.056581 0.016507 3.428 0.000832 ***
## CohortaPD1 -0.003367 0.007238 -0.465 0.642641
## Year -0.011194 0.006033 -1.855 0.065973 .
## TimeCategoryoneWeek 0.010656 0.008118 1.313 0.191769
## TimeCategorylate 0.007734 0.009551 0.810 0.419682
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04018 on 121 degrees of freedom
## (26 observations deleted due to missingness)
## Multiple R-squared: 0.0457, Adjusted R-squared: 0.01415
## F-statistic: 1.449 on 4 and 121 DF, p-value: 0.2222
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_NaiveB...FreqParent"))
prePostTimeAveraged(melted, title = "Naive B frequencies averaged", xLabel = NULL, yLabel = "NaiveB (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

———– PB and Tfh just baseline frequencies ——————–
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: cTfh +/+ at d0", yLabel="ICOS+CD38+ cTfh frequency at d0")
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: PB at d0", yLabel="CD19+IgD-CD27++CD38++ frequency at d0")
## Warning: Removed 28 rows containing non-finite values (stat_boxplot).
## Warning: Removed 28 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="All yrs: ABC at d0", yLabel="ABC (freq of CD19+IgD-CD71+) at d0")
## Warning: Removed 28 rows containing non-finite values (stat_boxplot).
## Warning: Removed 28 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD19hi_NaiveB...FreqParent", fillParam="Cohort", title="All yrs: Naive B at d0", yLabel="CD19+IgDhi frequency at d0")
## Warning: Removed 28 rows containing non-finite values (stat_boxplot).
## Warning: Removed 28 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="Live_CD3..CD4..ICOShi...FreqParent", fillParam="Cohort", title="All yrs: CD4+ICOS+ at d0", yLabel="CD4+ICOS+ frequency at d0")
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing missing values (geom_point).

———– PB and Tfh just day 7 frequencies ——————–
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek" & Year == "3" & cTfh_ICOShiCD38hi_..FreqParent > 0)
twoSampleBox(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="Yr3: ICOS+CD38+ cTfh at d7", yLabel="ICOS+CD38+ cTfh frequency at d7") + scale_y_continuous(breaks=seq(1:12), limits=c(0.5,11))

twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="Yr3: ICOS+CD38+ cTfh at d7", yLabel="ICOS+CD38+ cTfh frequency at d7") +
coord_cartesian(ylim = c(0.5,11)) + scale_y_continuous(breaks=seq(1:12))

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek" & Year == "3")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD19_CD27.CD38....FreqParent", fillParam="Cohort", title="Yr3: Plasmablast at d7", yLabel="CD19+CD27++CD38++ frequency at d7") + scale_y_continuous(breaks=seq(0,99,1))

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 0)
twoSampleBox(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: ICOS+CD38+ cTfh at d7", yLabel="ICOS+CD38+ cTfh frequency at d7") + scale_y_continuous(breaks=seq(0,99,1))

twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: ICOS+CD38+ cTfh at d7", yLabel="ICOS+CD38+ cTfh (freq CXCR5+)") +
coord_cartesian(ylim = c(0.5,11)) + scale_y_continuous(breaks=seq(1:12))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResponse_justDay7_AllYrs.pdf", device='pdf', width=7, height=7)
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="CD19+CD27++CD38++ frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$CD27hiCD38hi_..FreqParent <- subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 100
twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: PB (freq CD19) at d7", yLabel="CD19+CD27++CD38++ frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="IgDloCD71+CD20lo frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <- subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="IgDloCD71+CD20lo frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="All yrs: ABC at d7", yLabel="ABC frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <- subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="All yrs: ABC at d7", yLabel="ABC frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).

———– Tfh and PB Fold-change at day 7 ——————–
subsetData <- subset(mergedData, TimeCategory != "late" & Year == "3" & Cohort != "nonPD1")
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"));
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="Yr3: cTfh response at one week", yLabel="ICOS+CD38+ cTfh fold-change") + scale_y_continuous(breaks=seq(0,99,1))

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & cTfh_ICOShiCD38hi_..FreqParent > 0)
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"));
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: cTfh response at one week", yLabel="ICOS+CD38+ cTfh fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

twoSampleBar(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: cTfh response at one week", yLabel="ICOS+CD38+ cTfh fold-change") +
coord_cartesian(ylim = c(0,7)) + scale_y_continuous(breaks=seq(1:12))
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResponse_foldchange_AllYrs.pdf", device="pdf", width=7, height=7)
aggregate( FC_Tfhresponse, by= list( FC_Tfhresponse$Cohort), FUN=mean, na.rm = T)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Group.1 Subject Cohort baseline oneWeek FC
## 1 Healthy NA NA 3.894815 4.627778 1.236258
## 2 aPD1 NA NA 3.887727 6.236190 1.884346
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1")
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent"));
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="AllYrs: PB response at one week", yLabel="CD19+CD27++CD38++ (freq. IgD-) fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 29 rows containing non-finite values (stat_boxplot).
## Warning: Removed 29 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Year == "3" & Cohort != "nonPD1")
subsetData$CD27hiCD38hi_..FreqParent <- subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent /100
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent"));
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="Yr3: PB response (freq CD19) at one week", yLabel="CD19+CD27++CD38++ fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0)
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"));
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: PB response at one week", yLabel="IgDloCD71+CD20lo fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0)
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <- subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"));
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: PB response (freq CD19) at one week", yLabel="IgDloCD71+CD20lo fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0)
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent"));
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="AllYrs: ABC response FC at one week", yLabel="ABC fold-change")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0)
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <- subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent"));
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="AllYrs: ABC response (freq CD19) FC at one week", yLabel="ABC fold-change")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1")
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"));
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent"));
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,],
name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB (freqParent) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change",
yLabel = "CD19+CD27++CD38++ PB fold-change") + scale_y_continuous(limits=c(0,12))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" )
subsetData$CD27hiCD38hi_..FreqParent <- subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"));
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent"));
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,],
name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB (as freq CD19) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change",
yLabel = "CD19+CD27++CD38++ PB fold-change") + scale_y_continuous(limits=c(0,12))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0 )
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"));
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"));
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,],
name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change",
yLabel = "IgDloCD71+CD20lo PB fold-change")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0 )
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <- subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"));
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"));
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,],
name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB (as freq CD19) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change",
yLabel = "IgDloCD71+CD20lo PB fold-change") + scale_y_continuous(limits=c(0,12))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0 )
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"));
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent"));
FC_Tfhresponse$ABC <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,],
name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "ABC", fillParam = "Cohort", title = "FC ABC vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change",
yLabel = "IgDloCD71+CD20hi ABC fold-change") + scale_y_continuous(limits=c(0,4))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 26 rows containing missing values (geom_smooth).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0 )
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <- subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"));
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent"));
FC_Tfhresponse$ABC <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,],
name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "ABC", fillParam = "Cohort", title = "FC ABC (as freq CD19) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change",
yLabel = "IgDloCD71+CD20hi ABC fold-change") + scale_y_continuous(limits=c(0,12))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

———– PB vs cTfh response correlations ——————–
## *************** CD27++CD38++ ********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy" & Year == "3")
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & Year == "3" )
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "Yr3: cTfh vs PB response at oneWeek", xLabel= "CD27++CD38++ (freq IgD-)", yLabel = "ICOS+CD38+ cTfh frequency") + scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "CD27hiCD38hi_..FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "AllYrs: cTfh vs CD27++CD38++ at oneWeek", xLabel= "CD27++CD38++ (freq IgD-)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).

fit <- lm(subsetData1$CD27hiCD38hi_..FreqParent ~ subsetData1$cTfh_ICOShiCD38hi_..FreqParent)
fit <- lm(subsetData1$CD27hiCD38hi_..FreqParent ~ subsetData1$cTfh_ICOShiCD38hi_..FreqParent)
outlier <- which(cooks.distance(fit) == max(cooks.distance(fit))) # one row identified with high Cook's distance
bivScatter(data1 = subsetData1[ -as.numeric(names(outlier)), ], data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "Yr3: cTfh vs PB response at oneWeek", xLabel= "Plasmablast (freq CD19+IgD-)", yLabel = "ICOS+CD38+ cTfh frequency") +
scale_x_continuous(breaks=seq(0,99,1)) + scale_y_continuous(breaks=seq(0,99,2))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-27-38-d7_freqCD71.pdf", device="pdf")
# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$CD27hiCD38hi_..FreqParent <- oneWeek$CD27hiCD38hi_..FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & cTfh_ICOShiCD38hi_..FreqParent > 1) # ***** OUTLIER REMOVED
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "AllYrs: cTfh vs CD27++CD38++ at oneWeek", xLabel= "CD27++CD38++ (freq CD19)", yLabel = "ICOS+CD38+ cTfh frequency") +
scale_x_continuous(breaks=seq(0,99,0.05)) + scale_y_continuous(breaks=seq(0,99,2))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

fit <- lm(subsetData1$CD27hiCD38hi_..FreqParent ~ subsetData1$cTfh_ICOShiCD38hi_..FreqParent)
outlier <- which(cooks.distance(fit) == max(cooks.distance(fit))) # one row identified with high Cook's distance
# by review of Cook's distance, row 26 in subsetData1 has the highest Cook's distance and will thus exclude
bivScatter(data1 = subsetData1[ -as.numeric(names(outlier)), ], data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "AllYrs: cTfh vs CD27++CD38++ at oneWeek", xLabel= "CD27++CD38++ (freq CD19)", yLabel = "ICOS+CD38+ cTfh frequency") +
scale_x_continuous(breaks=seq(0,99,0.05)) + scale_y_continuous(breaks=seq(0,99,2))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-27-38-d7_freqCD19.pdf", device="pdf")
univScatter(data = oneWeek, xData = "CD27hiCD38hi_..FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "AllYrs: cTfh vs CD27++CD38++ at oneWeek", xLabel= "CD27++CD38++ (freq CD19)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-27-38-d7_freqCD19_univ.pdf", device="pdf")
## *************** CD71+ CD20loPB ********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy" & Year == "3")
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & Year == "3" )
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "Yr3: cTfh vs CD20lo response at oneWeek", xLabel= "CD71+CD20lo frequency", yLabel = "ICOS+CD38+ cTfh frequency") +
scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1), limits = c(0,15))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "Yr3: cTfh vs CD20lo at oneWeek", xLabel= "CD20lo (freq CD71+)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).

# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent <- oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
subsetData1 <- subset(oneWeek, Cohort == "Healthy" & Year == "3" )
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & Year == "3" )
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "Yr3: cTfh vs CD20lo response at oneWeek", xLabel= "PB CD71+CD20lo (freq CD19)", yLabel = "ICOS+CD38+ cTfh frequency")# + scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1), limits = c(0,15))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "Yr3: cTfh vs CD20lo at oneWeek", xLabel= "PB CD71+CD20lo (freq CD19)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-71hi-20lo-d7_freqCD19_univ.pdf", device="pdf")
## *************** CD71+ CD20hi ABC ********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy" & Year == "3" )
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & Year == "3" )
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_ActBCells...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "Yr3: cTfh vs ABC at oneWeek", xLabel= "ABC CD20hi (freq CD71+)", yLabel = "ICOS+CD38+ cTfh frequency") +
scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1), limits = c(0,15))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_ActBCells...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "Yr3: cTfh vs CD20hi at oneWeek", xLabel= "ABC CD20hi (freq CD71+)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).

# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_ActBCells...FreqParent <- oneWeek$IgDlo_CD71hi_ActBCells...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
subsetData1 <- subset(oneWeek, Cohort == "Healthy" & Year == "3" )
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & Year == "3" )
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_ActBCells...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "cTfh vs ABC at oneWeek", xLabel= "ABC CD20hi (freq CD19)", yLabel = "ICOS+CD38+ cTfh frequency") #+ scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1), limits=c(0,12))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_ActBCells...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "Yr3: cTfh vs CD20hi at oneWeek", xLabel= "ABC CD20hi (freq CD19)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ABC-71hi-20hi-d7_freqCD19_univ.pdf", device="pdf")
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & cTfh_ICOShiCD38hi_..FreqParent > 1) # ***** OUTLIER REMOVED
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "AllYrs: cTfh vs PB response at oneWeek", xLabel= "Plasmablast frequency", yLabel = "ICOS+CD38+ cTfh frequency") + scale_x_continuous(breaks=seq(0,99,1)) + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$CD27hiCD38hi_..FreqParent <- oneWeek$CD27hiCD38hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent /100
subsetData1 <- subset(oneWeek, Cohort == "Healthy" & Year == "3" )
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & Year == "3" & cTfh_ICOShiCD38hi_..FreqParent > 1) # ***** OUTLIER REMOVED
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "cTfh vs PB response at oneWeek", xLabel= "Plasmablast frequency (freq CD19)", yLabel = "ICOS+CD38+ cTfh frequency") #+ scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1") # ***** OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy") ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_ActBCells...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "AllYrs: cTfh vs ActivB response at oneWeek", xLabel= "CD20hi (freq CD71+)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_ActBCells...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "AllYrs: cTfh vs CD20hi response at oneWeek", xLabel= "CD20hi (freq CD71+)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

cTfhCorrel <- function( data, subset )
{
z <- vector()
z[1] <- subset
z[2] <- round(cor(data[ which(data$Cohort == "Healthy"), subset], data[ which(data$Cohort == "Healthy") ,"cTfh_ICOShiCD38hi_..FreqParent"], method = "pearson", use = "complete.obs"), 2)
z[3] <- round(cor(data[ which(data$Cohort == "aPD1"), subset], data[ which(data$Cohort == "aPD1"),"cTfh_ICOShiCD38hi_..FreqParent"], method = "pearson", use = "complete.obs"), 2)
return(z)
}
result <- data.frame(CellType = 0, Healthy = 0, aPD1 = 0)
data <- oneWeek
result[1,] <- cTfhCorrel(data, subset = "IgDlo_CD71hi_ActBCells...FreqParent")
result[2,] <- cTfhCorrel(data, subset = "IgDlo_CD71hi_CD20loCD71hi...FreqParent")
result[3,] <- cTfhCorrel(data, subset = "CD27hiCD38hi_..FreqParent")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_ActBCells...FreqParent", "Activ B cells")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_CD20loCD71hi...FreqParent", "CD20lo ASC")
result$CellType <- str_replace(result$CellType, "CD27hiCD38hi_..FreqParent", "CD27++CD38++ ASC")
result <- melt(result, id.vars = "CellType", measure.vars = c("Healthy","aPD1")); result$value <- as.numeric(result$value)
result$CellType <- factor(result$CellType, levels = c("CD27++CD38++ ASC", "CD20lo ASC", "Activ B cells"))
ggplot(result, aes(x=CellType, fill=variable, color="bl")) + geom_bar(aes(y = value), stat='identity', position="dodge" ) + theme_bw() +
scale_fill_manual(values=c("#7FAEDB", "#FFB18C")) + scale_color_manual(values="black") + ggtitle("Pearson correl with\n ICOS+CD38+ cTfh at oneWeek") + ylab("Pearson r") +
theme(axis.text.x = element_text(size=12,hjust = 1, angle = 45), axis.title = element_text(size=12,hjust = 0.5), plot.title = element_text(size=14,hjust = 0.5),
axis.title.x = element_blank(), axis.text.y = element_text(size=12), legend.position = "none")+ scale_y_continuous(breaks = seq(-1,1,0.2), limits=c(-1,1))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_various-d7_pearsons.pdf", device = "pdf", width=3, height=5)
———– Averaged frequencies over time ——————–
subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "Cohort", title = "cTfh response over time", xLabel = "TimeCategory",
yLabel = "ICOS+CD38+ cTfh frequency", groupby = "Subject") + scale_y_continuous(breaks=seq(0,150,5)) # limits = c(0,45)
## Warning: Removed 23 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
## Warning: Removed 23 rows containing missing values (geom_path).
## Warning: Removed 23 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "CD19_CD27.CD38....FreqParent", fillParam = "Cohort", title = "PB response over time", xLabel = "TimeCategory",
yLabel = "Plasmablast frequency", groupby = "Subject") + scale_y_continuous(breaks=seq(0,150,5)) # limits = c(0,45)
## Warning: Removed 23 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
## Warning: Removed 23 rows containing missing values (geom_path).
## Warning: Removed 23 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "cTfh responses averaged", xLabel = NULL, yLabel = "Average ICOS+CD38+ cTfh frequency") + scale_y_continuous(breaks=seq(0,99,0.5))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_..FreqParent"))
prePostTimeAveraged(melted, title = "CD19+ frequencies averaged", xLabel = NULL, yLabel = "CD19+ (freq of CD14-CD4- live)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD27hiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "Plasmablast responses averaged", xLabel = NULL, yLabel = "CD27++CD38++ Plasmablast frequency") + scale_y_continuous(breaks=seq(0,99,0.25))

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$CD27hiCD38hi_..FreqParent <- subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent /100
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD27hiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "Plasmablast responses averaged", xLabel = NULL, yLabel = "CD27++CD38++ Plasmablast frequency") + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"))
prePostTimeAveraged(melted, title = "IgDloCD71+CD20lo responses averaged", xLabel = NULL, yLabel = "Average IgDloCD71+CD20lo frequency") + scale_y_continuous(breaks=seq(0,99,3))

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <- subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"))
prePostTimeAveraged(melted, title = "IgDloCD71+CD20lo responses averaged", xLabel = NULL, yLabel = "Average IgDloCD71+CD20lo frequency") #+ scale_y_continuous(breaks=seq(0,99,3))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_ActBCells...FreqParent"))
prePostTimeAveraged(melted, title = "IgDloCD71+CD20+ ABC responses averaged", xLabel = NULL, yLabel = "Average IgDloCD71+CD20+ frequency") + scale_y_continuous(breaks=seq(0,99,3))

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <- subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_ActBCells...FreqParent"))
prePostTimeAveraged(melted, title = "IgDloCD71+CD20+ ABC responses averaged", xLabel = NULL, yLabel = "Average IgDloCD71+CD20+ frequency") #+ scale_y_continuous(breaks=seq(0,99,3))

———– Vaccine response determinants and biomarkers ——————–
# correlate HAI vs cycle of IT
subsetData <- subset(mergedData, cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1") # ***** OUTLIER REMOVED
univScatter(data = subsetData, xData = "HAI.titer..H1N1pdm09.", yData = "Cycle.of.Immunotherapy", fillParam = "dummy",
title = "AllYrs: Cycle of IT vs HAI titer", xLabel= "HAI titer H1N1pdm09", yLabel = "Cycle of immunotherapy")
## Warning: Removed 120 rows containing non-finite values (stat_smooth).
## Warning: Removed 120 rows containing missing values (geom_point).

univScatter(data = subsetData, xData = "cTfh_ICOShiCD38hi_..FreqParent", yData = "Cycle.of.Immunotherapy", fillParam = "dummy",
title = "AllYrs: Cycle of IT vs +/+ cTfh at bL", xLabel= "ICOS+CD38+ cTfh", yLabel = "Cycle of immunotherapy") + coord_cartesian(xlim= c(0,6))
## Warning: Removed 105 rows containing non-finite values (stat_smooth).
## Warning: Removed 105 rows containing missing values (geom_point).

summary(fit <- lm(Cycle.of.Immunotherapy ~ cTfh_ICOShiCD38hi_..FreqParent, subsetData))
##
## Call:
## lm(formula = Cycle.of.Immunotherapy ~ cTfh_ICOShiCD38hi_..FreqParent,
## data = subsetData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.706 -4.822 -1.891 2.732 18.037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.877 6.991 2.843 0.0117 *
## cTfh_ICOShiCD38hi_..FreqParent -2.550 1.786 -1.428 0.1726
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.531 on 16 degrees of freedom
## (105 observations deleted due to missingness)
## Multiple R-squared: 0.113, Adjusted R-squared: 0.05758
## F-statistic: 2.039 on 1 and 16 DF, p-value: 0.1726
FC_response <- dcast( subset(mergedData, TimeCategory != "late" & cTfh_ICOShiCD38hi_..FreqParent > 0), `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"))
FC_response$FCtfh <- FC_response$`oneWeek`/FC_response$`baseline`; FC_response$Cohort <- NULL
subsetData <- merge(x=FC_response, y=subsetData, by = "Subject")
univScatter(data = subsetData, xData = "FCtfh", yData = "Cycle.of.Immunotherapy", fillParam = "dummy",
title = "AllYrs: Cycle of IT vs +/+ cTfh FC", xLabel= "ICOS+CD38+ cTfh FoldChange", yLabel = "Cycle of immunotherapy")
## Warning: Removed 105 rows containing non-finite values (stat_smooth).
## Warning: Removed 105 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1")
FC_response <- dcast( subset(mergedData, TimeCategory != "oneWeek"), `Subject`+`Cohort`~`TimeCategory`, value.var = c("HAI.titer..H1N1pdm09."))
FC_response$FChai <- FC_response$`late`/FC_response$`baseline`; FC_response$Cohort <- NULL
subsetData <- merge(x=FC_response, y=subsetData, by = "Subject")
univScatter(data = subsetData, xData = "FChai", yData = "Cycle.of.Immunotherapy", fillParam = "dummy",
title = "Cycle of IT vs HAI titer FC", xLabel= "HAI titer H1N1pdm09 FoldChange", yLabel = "Cycle of immunotherapy") + coord_cartesian(xlim = c(0,10))
## Warning: Removed 144 rows containing non-finite values (stat_smooth).
## Warning: Removed 144 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CycleIT_vs_HAItiterFC.pdf", device='pdf', width=6, height=6)
subsetData <- subset(mergedData, TimeCategory != "oneWeek" & Cohort != "nonPD1" )
FC_response <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("HAI.titer..H1N1pdm09."))
FC_response$FChai <- FC_response$`late`/FC_response$`baseline`
FC_response2 <- dcast( subset(mergedData, TimeCategory != "late" & cTfh_ICOShiCD38hi_..FreqParent > 0), `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent"))
FC_response2$FCtfh <- FC_response2$`oneWeek`/FC_response2$`baseline`; FC_response2$Cohort <- NULL
FC_response <- merge(x=FC_response, y=FC_response2, by = "Subject")
bivScatter(data1 = FC_response[which(FC_response$Cohort == "Healthy"),], data2 = FC_response[ which(FC_response$Cohort == "aPD1") ,],
name1 = "Healthy", name2 = "aPD1", xData = "FCtfh", yData = "FChai", fillParam = "Cohort", title = "FC HAI vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change",
yLabel = "H1N1pdm09 titer fold-change") + scale_y_continuous(limits=c(1,128), trans = "log2")
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 50 rows containing missing values (geom_smooth).
## Warning: Removed 45 rows containing missing values (geom_smooth).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("HAI.titer..H1N1pdm09.")); melted <- melted[ which( !is.na(melted$value) ),]
overTime <- aggregate( melted$value, by= list(melted$variable, melted$TimeCategory, melted$Cohort), FUN=mean, na.rm = T); overTime
## Group.1 Group.2 Group.3 x
## 1 HAI.titer..H1N1pdm09. baseline Healthy 74.66667
## 2 HAI.titer..H1N1pdm09. oneWeek Healthy 130.66667
## 3 HAI.titer..H1N1pdm09. late Healthy 114.66667
## 4 HAI.titer..H1N1pdm09. baseline aPD1 76.00000
## 5 HAI.titer..H1N1pdm09. oneWeek aPD1 153.60000
## 6 HAI.titer..H1N1pdm09. late aPD1 306.66667
aPD1_seroprot <- length(which(melted$value > 40 & melted$Cohort == "aPD1" & melted$TimeCategory == "late") )
aPD1_total <- length(which(melted$Cohort == "aPD1" & melted$TimeCategory == "late") )
HC_seroprot <- length(which(melted$value > 40 & melted$Cohort == "Healthy" & melted$TimeCategory == "late") )
HC_total <- length(which(melted$Cohort == "Healthy" & melted$TimeCategory == "late") )
continTable <- matrix(c(aPD1_seroprot, aPD1_total - aPD1_seroprot, HC_seroprot, HC_total - HC_seroprot), ncol=2); continTable
## [,1] [,2]
## [1,] 11 9
## [2,] 1 3
fisher.test(continTable)
##
## Fisher's Exact Test for Count Data
##
## data: continTable
## p-value = 0.5901
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2320005 209.3558590
## sample estimates:
## odds ratio
## 3.480296
seroprot <- data.frame( Cohort = c("Healthy", "aPD1"), Seroprot = c(HC_seroprot / HC_total, aPD1_seroprot / aPD1_total))
seroprot$Cohort <- factor(seroprot$Cohort, levels = c("Healthy", "aPD1"))
ggplot(data=seroprot, aes(x=Cohort, y=Seroprot, fill=Cohort)) + scale_fill_manual(values = c("#7FAEDB", "#FFB18C")) +
geom_bar( position = position_dodge(), stat = "identity") + ggtitle("Seroprotection") + ylab("Proportion seroprotected") + theme_bw() +
theme(axis.text = element_text(size=28,hjust = 0.5), axis.title = element_text(size=28,hjust = 0.5), axis.title.x = element_blank(), plot.title = element_text(size=32,hjust = 0.5)) +
theme(legend.position = "none") + coord_cartesian(ylim = c(0,1)) + scale_y_continuous(breaks = seq(0,1,0.1))

# ggsave (filename = "D:/Pembro-Fluvac/Analysis/Images/Seroprotection_byCohort.pdf", device="pdf", width=6, height = 6)
prePostTimeAveraged(melted, title = "HAI responses", xLabel = NULL, yLabel = "H1N1pdm09 HAI titer") + scale_y_continuous(trans = 'log2', breaks=c(2^(2:14)))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAIresponsesTimeAveraged.pdf", device = "pdf", width=7, height=7)
summary( fit <- aov(value ~ Cohort + TimeCategory, data=melted ) )
## Df Sum Sq Mean Sq F value Pr(>F)
## Cohort 1 72676 72676 2.022 0.1598
## TimeCategory 2 257830 128915 3.587 0.0333 *
## Residuals 65 2336358 35944
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = value ~ Cohort + TimeCategory, data = melted)
##
## $Cohort
## diff lwr upr p adj
## aPD1-Healthy 64.9697 -26.28125 156.2206 0.1598243
##
## $TimeCategory
## diff lwr upr p adj
## oneWeek-baseline 80.00000 -59.81844 219.8184 0.3612303
## late-baseline 139.87879 13.38223 266.3753 0.0267711
## late-oneWeek 59.87879 -84.27416 204.0317 0.5818223
subsetData <- subset(melted, TimeCategory == "late")
# subsetData$value <- log2(subsetData$value)
# wilcox.test(subsetData$value ~ subsetData$Cohort)
twoSampleBox(data = subsetData, xData = "Cohort", yData = "value", fillParam = "Cohort", title = "Late H1N1pdm09 HAI titer", yLabel = "H1N1pdm09 HAI titer") +
scale_y_continuous(trans = "log2")

twoSampleBarMelted(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="Late H1N1pdm09 HAI titer", yLabel="H1N1pdm09 HAI titer") +
scale_y_continuous(trans = "log2",breaks=c(2^(2:14))) + coord_cartesian(ylim = c(4,4096))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAItiter_late_byCohort.pdf", device="pdf", width=6, height=6)
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="ICOS+CD38+ cTfh at baseline", yLabel="ICOS+CD38+ cTfh freq")
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing missing values (geom_point).

twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="ICOS+CD38+ cTfh at baseline", yLabel="ICOS+CD38+ cTfh (freq CXCR5+)") +
coord_cartesian(ylim = c(0,12))
## Warning: Removed 10 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh_baseline_freq_byCohort.pdf", device="pdf", width=7, height=7)
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "late")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="PB response at Late", yLabel="Plasmablast freq - Late")
## Warning: Removed 24 rows containing non-finite values (stat_boxplot).
## Warning: Removed 24 rows containing missing values (geom_point).

———– Scatterplots of HAI titer vs CXCL13 ——————–
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Plasma.CXCL13..pg.mL."))
prePostTimeAveraged(melted, title = "CXCL13 responses", xLabel = NULL, yLabel = "Plasma CXCL13 (pg/mL)") + scale_y_continuous(breaks=seq(0,99,2))

summary( fit <- aov(value ~ Cohort*TimeCategory, data=melted ) )
## Df Sum Sq Mean Sq F value Pr(>F)
## Cohort 1 16 15.5 0.045 0.832
## TimeCategory 2 105 52.6 0.154 0.857
## Cohort:TimeCategory 2 288 143.8 0.421 0.658
## Residuals 63 21508 341.4
## 83 observations deleted due to missingness
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_TimeAveraged.pdf", device="pdf", width=7, height=7)
subsetData <- subset(melted, TimeCategory == "oneWeek")
rownames(subsetData) <- seq(1:nrow(subsetData))
twoSampleBox(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13 at oneWeek", yLabel="plasma CXCL13 (pg/mL)")
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
## Warning: Removed 32 rows containing missing values (geom_point).

fit <- lm (value ~ Cohort, subsetData)
outlier <- which(cooks.distance(fit) == max(cooks.distance(fit))) ; subsetData <- subsetData[ - as.numeric( names(outlier)), ] # one row identified with high Cook's distance
subsetData <- subsetData[which(!is.na( subsetData$value)), ]
twoSampleBox(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13 at oneWeek", yLabel="plasma CXCL13 (pg/mL)")

twoSampleBar(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13 at oneWeek", yLabel="plasma CXCL13 (pg/mL)")

aggregate( subsetData, by= list(subsetData$variable, subsetData$TimeCategory, subsetData$Cohort), FUN=mean, na.rm = T)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Group.1 Group.2 Group.3 Subject TimeCategory Cohort variable
## 1 Plasma.CXCL13..pg.mL. oneWeek Healthy NA NA NA NA
## 2 Plasma.CXCL13..pg.mL. oneWeek aPD1 NA NA NA NA
## value
## 1 11.52227
## 2 25.08982
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_oW.pdf", device="pdf", height=7, width = 7)
subsetData <- subset(mergedData, Cohort == "aPD1" & TimeCategory == "oneWeek" )
univScatter(subsetData, xData = "HAI.titer..H1N1pdm09.", yData="Plasma.CXCL13..pg.mL.", fillParam = "Cohort",
title = "All yrs aPD1: CXCL13 vs HAI at one week", xLabel= "HAI titer - H1N1pdm09", yLabel = "Plasma CXCL13 (pg/mL)")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
crossTimeCategory <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c('Plasma.CXCL13..pg.mL.' , 'HAI.titer..H1N1pdm09.') )
crossTimeCategory2 <- dcast(crossTimeCategory, Subject + Cohort ~ TimeCategory + variable)
univScatter(crossTimeCategory2, xData = "baseline_Plasma.CXCL13..pg.mL.", yData="late_HAI.titer..H1N1pdm09.", fillParam = NULL,
title = "All yrs: Late HAI vs baseline CXCL13", xLabel= "baseline Plasma CXCL13 (pg/mL)", yLabel = "Late HAI titer - H1N1pdm09") # nonstd appearance due to lack of fillParam
## Warning: Removed 35 rows containing non-finite values (stat_smooth).
## Warning: Removed 35 rows containing missing values (geom_point).

univScatter(crossTimeCategory2, xData = "oneWeek_Plasma.CXCL13..pg.mL.", yData="late_HAI.titer..H1N1pdm09.", fillParam = NULL,
title = "All yrs: Late HAI vs d7 CXCL13", xLabel= "oneWeek Plasma CXCL13 (pg/mL)", yLabel = "Late HAI titer - H1N1pdm09") # nonstd appearance due to lack of fillParam
## Warning: Removed 42 rows containing non-finite values (stat_smooth).
## Warning: Removed 42 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
crossTimeCategory <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c('cTfh_ICOShiCD38hi_..FreqParent' , 'HAI.titer..H1N1pdm09.') )
crossTimeCategory2 <- dcast(crossTimeCategory, Subject + Cohort ~ TimeCategory + variable)
univScatter(crossTimeCategory2, xData = "baseline_HAI.titer..H1N1pdm09.", yData="late_HAI.titer..H1N1pdm09.", fillParam = NULL,
title = "All yrs: Late HAI vs baseline HAI", xLabel= "baseline HAI titer - H1N1pdm09", yLabel = "Late HAI titer - H1N1pdm09") # nonstd appearance due to lack of fillParam
## Warning: Removed 35 rows containing non-finite values (stat_smooth).
## Warning: Removed 35 rows containing missing values (geom_point).

crossTimeCategory2$FCtfh <- crossTimeCategory2$oneWeek_cTfh_ICOShiCD38hi_..FreqParent / crossTimeCategory2$baseline_cTfh_ICOShiCD38hi_..FreqParent
subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(data = subsetData, xData = "TimeCategory", yData = "HAI.titer..H1N1pdm09.", fillParam = "Cohort", title = "HAI titers over time", xLabel = "TimeCategory",
yLabel = "HAI titer", groupby = "Subject"); a + scale_y_continuous(trans='log2')
## Warning: Removed 83 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
## Warning: Removed 83 rows containing missing values (geom_path).
## Warning: Removed 83 rows containing missing values (geom_point).

## NULL
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="HAI.titer..H1N1pdm09.", fillParam="Cohort", title="All yrs: HAI titers at d0", yLabel="H1N1pdm09 titer") + scale_y_continuous(trans='log2', breaks=c(2^(2:14) ) )
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## Warning: Removed 31 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="HAI.titer..H1N1pdm09.", fillParam="Cohort", title="All yrs: HAI titers at d7", yLabel="H1N1pdm09 titer") + scale_y_continuous(trans='log2', breaks=c(2^(2:14) ) )
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
## Warning: Removed 32 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "late")
twoSampleBox(data=subsetData, xData="Cohort", yData="HAI.titer..H1N1pdm09.", fillParam="Cohort", title="All yrs: HAI titers at d21-28", yLabel="H1N1pdm09 titer") + scale_y_continuous(trans='log2' , breaks=c(2^(2:14) ) )
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).
## Warning: Removed 20 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
seroconv <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("HAI.titer..H1N1pdm09."));
seroconv$FC <- seroconv$`late`/seroconv$`baseline`
twoSampleBox(data=seroconv, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: seroconversion", yLabel="Seroconversion factor") + scale_y_continuous(breaks=seq(0,99,4))
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing missing values (geom_point).

twoSampleBar(data=seroconv, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: seroconversion", yLabel="Seroconversion factor") +
scale_y_continuous(breaks=seq(0,99,4))
## Warning: Removed 35 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/SeroconversionFactor.pdf", device = "pdf", width=6, height=6)
subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "Plasma.CXCL13..pg.mL.", fillParam = "Cohort", title = "CXCL13 over itme", xLabel = "TimeCategory",
yLabel = "HAI titer", groupby = "Subject") + scale_y_continuous(breaks=seq(0,150,5)) # limits = c(0,45)
## Warning: Removed 83 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
## Warning: Removed 83 rows containing missing values (geom_path).
## Warning: Removed 83 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="Plasma.CXCL13..pg.mL.", fillParam="Cohort", title="All yrs: Plasma CXCL13 at d0", yLabel="Plasma CXCL13 (pg/mL)")
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## Warning: Removed 31 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="Plasma.CXCL13..pg.mL.", fillParam="Cohort", title="All yrs: Plasma CXCL13 at d7", yLabel="Plasma CXCL13 (pg/mL)")
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
## Warning: Removed 32 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "late")
twoSampleBox(data=subsetData, xData="Cohort", yData="Plasma.CXCL13..pg.mL.", fillParam="Cohort", title="All yrs: Plasma CXCL13 at d21-28", yLabel="Plasma CXCL13 (pg/mL)")
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).
## Warning: Removed 20 rows containing missing values (geom_point).

summary(lm( Plasma.CXCL13..pg.mL. ~ Cohort + Year + TimeCategory, data=mergedData))
##
## Call:
## lm(formula = Plasma.CXCL13..pg.mL. ~ Cohort + Year + TimeCategory,
## data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.849 -9.952 -3.954 4.118 83.437
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.43559 14.40774 1.210 0.230
## CohortaPD1 1.36682 5.03068 0.272 0.787
## CohortnonPD1 1.81277 7.48578 0.242 0.809
## Year 0.04964 6.71269 0.007 0.994
## TimeCategoryoneWeek 2.13159 5.51850 0.386 0.700
## TimeCategorylate -0.44163 4.75046 -0.093 0.926
##
## Residual standard error: 17.97 on 70 degrees of freedom
## (85 observations deleted due to missingness)
## Multiple R-squared: 0.004192, Adjusted R-squared: -0.06694
## F-statistic: 0.05893 on 5 and 70 DF, p-value: 0.9976
———– subset protein correlations ——————–
# subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
# a <- colnames(subsetData[ , sapply(subsetData, class) == 'character'])
# a <- cor(subsetData[ , - grep(paste(c(a, "Cohort"), collapse="|"), colnames(subsetData))], use="pairwise.complete.obs")
# a[order(a[,20],decreasing = F),20]
oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1") # ***** OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy") ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_CD20loCD71hi.CD32hi...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "AllYrs: cTfh vs PB CD32hi at oneWeek", xLabel= "CD20lo - CD32hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_CD20loCD71hi.CD32hi...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "AllYrs: cTfh vs PB CD32hi at oneWeek", xLabel= "CD20lo - CD32hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-71hi-20lo-d7_CD32_univ.pdf")
oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1") # ***** OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy") ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_CD20loCD71hi.CD86....FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "AllYrs: cTfh vs PB CD86 resp at oneWeek", xLabel= "CD20lo - CD86hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_CD20loCD71hi.CD86....FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "AllYrs: cTfh vs CD20lo CD86+ at oneWeek", xLabel= "CD20lo - CD86hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1") # ***** OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy") ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD19_NonnaiveB.CD27.CD38....medfi.CD32.", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "AllYrs: cTfh vs PB CD32 at oneWeek", xLabel= "CD27++CD38++ PB - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "CD19_NonnaiveB.CD27.CD38....medfi.CD32.", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory",
title = "AllYrs: cTfh vs CD27++CD38++ CD32 at oneWeek", xLabel= "CD27++CD38++ PB - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs-PB-27hi38hi-d7_CD32medFI_univ.pdf")
# as predicted by the elastic net regression
oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1") # ***** OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy") ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDloCD71hi..medfi.CD86.", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDloCD71hi..medfi.CD86.", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "TimeCategory", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("X1.Kd.HA")); melted$value <- 1/melted$value
prePostTimeAveraged(melted, title = "HA EC50/Kd by ELISA", xLabel = NULL, yLabel = "1 / Concentration (ug/mL)") + scale_y_continuous(breaks=seq(0,99,0.25))

summary(fit <- lm( data = mergedData, X1.Kd.HA ~ Cohort + TimeCategory))
##
## Call:
## lm(formula = X1.Kd.HA ~ Cohort + TimeCategory, data = mergedData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7869 -0.7384 -0.4336 0.1578 5.9044
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6462 0.3658 1.767 0.0821 .
## CohortaPD1 0.3859 0.3857 1.000 0.3209
## TimeCategoryoneWeek 1.1021 0.4942 2.230 0.0293 *
## TimeCategorylate 1.0691 0.4351 2.457 0.0168 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.545 on 63 degrees of freedom
## (94 observations deleted due to missingness)
## Multiple R-squared: 0.1159, Adjusted R-squared: 0.07385
## F-statistic: 2.754 on 3 and 63 DF, p-value: 0.04978
summary(fit <- aov(formula = X1.Kd.HA ~ Cohort + TimeCategory, data = mergedData))
## Df Sum Sq Mean Sq F value Pr(>F)
## Cohort 1 0.96 0.958 0.402 0.5285
## TimeCategory 2 18.75 9.377 3.931 0.0246 *
## Residuals 63 150.29 2.386
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 94 observations deleted due to missingness
TukeyHSD(fit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = X1.Kd.HA ~ Cohort + TimeCategory, data = mergedData)
##
## $Cohort
## diff lwr upr p adj
## aPD1-Healthy 0.2391946 -0.5150433 0.9934325 0.5285435
##
## $TimeCategory
## diff lwr upr p adj
## oneWeek-baseline 1.064122846 -0.09774140 2.225987 0.0792318
## late-baseline 1.061795367 0.01849295 2.105098 0.0451923
## late-oneWeek -0.002327479 -1.20924183 1.204587 0.9999882
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/AntibodyAffinity_overTime.pdf", device="pdf", height=8, width=8)
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"); subsetData$X1.Kd.HA <- 1 / subsetData$X1.Kd.HA
twoSampleBox(data=subsetData, xData="Cohort", yData="X1.Kd.HA", fillParam="Cohort", title="All yrs: HA EC50/Kd at baseline", yLabel="1 / Concentration (ug/mL)")
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## Warning: Removed 31 rows containing missing values (geom_point).

twoSampleBar(data=subsetData, xData="Cohort", yData="X1.Kd.HA", fillParam="Cohort", title="All yrs: HA EC50/Kd at baseline", yLabel="1 / Concentration (ug/mL)")
## Warning: Removed 31 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/AntibodyAffinity_baseLine.pdf", device="pdf", height=7, width=7)
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"); subsetData$X1.Kd.HA <- 1 / subsetData$X1.Kd.HA
twoSampleBox(data=subsetData, xData="Cohort", yData="CD20loCD71hi...medfi.Ki67.", fillParam="Cohort", title="All yrs: at oneWeek", yLabel="medFI Ki67")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).

univScatter(data=subsetData, xData = "CD20loCD71hi...medfi.Ki67.", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "TimeCategory", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing missing values (geom_point).

subsetData1 <- subset(subsetData, Cohort == "Healthy") ; subsetData2 <- subset(subsetData, Cohort == "aPD1")
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD20loCD71hi...medfi.Ki67.", yData="cTfh_ICOShiCD38hi_..FreqParent",
fillParam = "Cohort", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_Ki67....FreqParent", fillParam="Cohort", title="All yrs: at oneWeek", yLabel="medFI Ki67")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).
